if (!dir.exists("Spf66_calibration")) dir.create("Spf66_calibration")
if (!dir.exists("Metrics_of_interest")) dir.create("Metrics_of_interest")
RUN_ALL <- FALSE # flag whether or not to re-run fitting
# source code for model fitting
source("Model_calibration_code/clin_inf_likelihood_helper_func.R")
source("Model_calibration_code/Metropolis_Hastings_fit.R")
source("Model_calibration_code/simulate_clinical_recurrences.R")
# source code for metrics of epidemiological interest
source("Metrics_of_interest_code/metrics_under_stationary_hyp_reservoir.R")
source("Metrics_of_interest_code/classify_relapses.R")
source("Metrics_of_interest_code/relapses_per_bite.R")
# pre-processed data from SPf66 cohort for model fitting
patient_metadata <- read_rds("Spf66_data_cleaned/patient_metadata.rds")
inf_states_by_VIN <- read_rds("Spf66_data_processed/discretised_infection_matrix.rds")
SEASONALITY <- read_rds("Spf66_data_processed/seasonality_vector.rds") # inferred from Pf
N_COHORT <- nrow(patient_metadata)
keep_VIN <- as.character(patient_metadata$VIN)
# discretisation into uniform windows
START_DATE <- as.Date("1993-10-01")
END_DATE <- as.Date("1995-07-15")
STUDY_DURATION <- as.numeric(difftime(END_DATE, START_DATE, unit="days"))
TIME_STEP <- 10
N_OBS <- STUDY_DURATION%/%10
SHIFT_WINDOW <- 20 # estimate Pv FOI separately before and after this window
YEAR_IN_TIMESTEP <- 365%/%TIME_STEP
THRESHOLD_AGE <- 2*YEAR_IN_TIMESTEP # possible stratification in FORI above/below this age
# fixed covariates
PREL_BASELINE <- 0.4 # based on in vivo experiments for Chesson strain
N_AGES <- patient_metadata[names(inf_states_by_VIN), "AGE"]*YEAR_IN_TIMESTEP # in units of TIME_STEP
names(N_AGES) <- names(inf_states_by_VIN)
# number of chains and iterations for MCMC (nested parallelisation over chains)
N_CHAINS <- 4
N_ITER <- 100000
BURNIN_PROP <- 0.2
N_CORES_PER_CHAIN <- 8
# parameters for Metropolis-Hastings proposal
LAMBDA_PROP_SD <- 0.02/365
NU_PROP_SD <- 0.2
ETA_PROP_SD <- 1/2000
LOGIT_RHO_PROP_SD <- 0.05
LOG_GAMMA_PROP_SD <- 0.05
# hyperparameters for informative priors
LOG_GAMMA_PRIOR_SD <- 0.6
LOGIT_RHO_PRIOR_SD <- 0.7
# parameters for posterior predictive data
N_POSTERIOR_PREDICTIVE_DATASETS <- 2000
PROP_MASKED <- 0.5
PROPHYLAXIS_WINDOW <- 1
BUNCHING_WINDOW <- 2
# observed data for posterior predictive checking
masking_periods <- read_rds("Spf66_data_processed/masking_periods.rds")
observed_incidence_by_indiv <- read_rds("Spf66_data_processed/incidence_by_individual.rds")
observed_incidence_by_window <- read_rds("Spf66_data_processed/incidence_by_window.rds")
malaria_consultations_reconciled <- read_rds("Spf66_data_processed/malaria_consultations_reconciled.rds")
# formatting
POSTERIOR_COL <- "#cf7940"
OBSERVED_COL <- "#4878b8"
Baseline model: assumes geometrically-distributed sporozoite inocula, with the ratio of hypnozoites to immediately-developing forms informed by in vivo estimates for the Chesson strain and no age-stratification in the force of inoculation
if (RUN_ALL || !file.exists("Spf66_calibration/MCMC_results_main.rds")) {
set.seed("28091907")
# rescale LAMBDA2 to be the mean FOI in the second stage of the study
MCMC_results <- Metropolis_Hastings(inf_states_by_VIN, N_AGES, PREL_BASELINE, N_OBS, TIME_STEP,
SEASONALITY, SHIFT_WINDOW, THRESHOLD_AGE, 1,
N_CHAIN, N_ITER, N_CORES_PER_CHAIN,
LAMBDA_PROP_SD, NU_PROP_SD, ETA_PROP_SD,
LOGIT_RHO_PROP_SD, LOG_GAMMA_PROP_SD,
LOG_GAMMA_PRIOR_SD, LOGIT_RHO_PRIOR_SD) %>%
mutate(LAMBDA2=LAMBDA2*mean(tail(SEASONALITY, N_OBS)[(SHIFT_WINDOW+1):N_OBS]))
write_rds(MCMC_results, "Spf66_calibration/MCMC_results_main.rds")
} else {
MCMC_results <- read_rds("Spf66_calibration/MCMC_results_main.rds")
}
MCMC_posterior <- MCMC_results %>% subset(ITER>BURNIN_PROP*N_ITER) %>% mutate(CHAIN=as.character(CHAIN))
Below, we report posterior median estimates and 95% confidence intervals for various quantites of epidemiological interest.
| variable | median | LCI | UCI |
|---|---|---|---|
| Average duration of hypnozoite carriage (days) | 170.7949150 | 143.5511455 | 205.9326978 |
| Half-life of a hypnozoite batch (days) | 118.3860138 | 99.5020718 | 142.7416689 |
| Prob of activation in 2 week window | 0.0787001 | 0.0657240 | 0.0929214 |
| Prob of activation in 4 week window | 0.1512065 | 0.1271283 | 0.1772085 |
| Prob of activation in 12 week window | 0.3884863 | 0.3349548 | 0.4429818 |
| Prob of activation in 24 week window | 0.6260510 | 0.5577149 | 0.6897307 |
| Average sporozoite batch size | 6.8569487 | 5.4376382 | 8.7320378 |
| Average hypnozoite batch size | 2.7427795 | 2.1750553 | 3.4928151 |
| Prob of primary infection per bite | 0.8044648 | 0.7654005 | 0.8397236 |
| No primary but at least one relapse per bite | 0.0682593 | 0.0575230 | 0.0792631 |
| Prob at least 2 recurrences per bite | 0.7887810 | 0.7488923 | 0.8253521 |
| Relapse:primary ratio per bite | 3.4094461 | 2.8417219 | 4.1594818 |
| Prob relapse within 2 weeks of bite | 0.1768263 | 0.1464061 | 0.2160710 |
| Average force of inoculation in first stage of study (yearly) | 0.5717149 | 0.4703640 | 0.6901984 |
| Average force of inoculation in second stage of study (yearly) | 0.4956555 | 0.4001830 | 0.6047811 |
| Average force of prim inf in first stage of study (yearly) | 0.4593294 | 0.3847773 | 0.5419750 |
| Average force of prim inf in second stage of study (yearly) | 0.3979545 | 0.3266305 | 0.4809973 |
ETA_POSTERIOR=median(MCMC_posterior$ETA)
NU_POSTERIOR=median(MCMC_posterior$NU)
FORI_POSTERIOR <-
time_dependent_fori(median(MCMC_posterior$LAMBDA1),
median(MCMC_posterior$LAMBDA2)/
mean(tail(SEASONALITY, N_OBS)[(SHIFT_WINDOW+1):N_OBS]),
SHIFT_WINDOW, SEASONALITY)
AGES <- sort(unique(patient_metadata$AGE))
PCLIN_POSTERIOR <- sapply(AGES, function(a) {
median(calculate_pclin(a, MCMC_posterior$LOGIT_RHO, MCMC_posterior$LOG_GAMMA,
min(AGES), max(AGES)))})
names(PCLIN_POSTERIOR) <- AGES
We generate posterior predictive data under the model by sampling parameter combinations uniformly at random (without replacement) from the joint posterior. For comparability, we retain the age structure of the SPf66 cohort in addition to patterns of left/right-censoring and documented absences from the camp. The effects of post-exposure prophylaxis due to the treatment of symptomatic falciparum malaria is not accounted for.
MASKING_MATRIX <- masking_periods[["exc_prophylaxis"]] %>%
transmute(VIN=factor(VIN, levels=keep_VIN),
window_start=START%/%TIME_STEP+1, window_end=END%/%TIME_STEP+1,
prop_window_start=1-(START%%TIME_STEP)/TIME_STEP,
prop_window_end=(END%%TIME_STEP)/TIME_STEP) %>%
split(f=.$VIN) %>%
sapply(function(masking_windows) {
masking_windows <- masking_windows %>% dplyr::select(-VIN)
inf_state <- rep(1, N_OBS)
if (nrow(masking_windows)<1) return(inf_state)
# fully masked windows
fully_masked <- masking_windows %>% subset(window_end-window_start>1) %>%
apply(1, function(x) (x[1]+1):(x[2]-1))
if (is.list(fully_masked)) fully_masked <- do.call(c, fully_masked)
inf_state[fully_masked] <- NA
# calculate proportion of each window that is masked due to prophylaxis or absence
proportion_masked <- rep(0, N_OBS)
proportion_masked[masking_windows$window_start] <-
proportion_masked[masking_windows$window_start] + masking_windows$prop_window_start
proportion_masked[masking_windows$window_end] <-
proportion_masked[masking_windows$window_end] + masking_windows$prop_window_end
inf_state[which(proportion_masked>PROP_MASKED)] <- NA
return(inf_state)}) %>% t
write_rds(MASKING_MATRIX, "Spf66_data_processed/discretised_masking_matrix.rds")
Each simulated symptomatic episode is modeled to be treated with a long-lived antimalarial that gives rise to prophylactic masking for 1, and a potential bunching effect for the subsequent 2 windows (where each window spans 10 days).
set.seed("15051907")
posterior_draws <- MCMC_posterior[sample(1:nrow(MCMC_posterior), N_POSTERIOR_PREDICTIVE_DATASETS),]
MIN_AGE <- min(N_AGES)
MAX_AGE <- max(N_AGES)
posterior_predictive_cohorts <- lapply(1:N_POSTERIOR_PREDICTIVE_DATASETS, function(i) {
simulate_dataset(time_dependent_fori(posterior_draws[i, "LAMBDA1"],
posterior_draws[i, "LAMBDA2"]/mean(tail(SEASONALITY, N_OBS)[(SHIFT_WINDOW+1):N_OBS]),
SHIFT_WINDOW, SEASONALITY),
TIME_STEP, N_AGES, N_OBS, posterior_draws[i, "NU"], 1,
posterior_draws[i, "ETA"], PREL_BASELINE, PROPHYLAXIS_WINDOW, BUNCHING_WINDOW,
calculate_pclin(N_AGES, posterior_draws[i, "LOGIT_RHO"],
posterior_draws[i, "LOG_GAMMA"], MIN_AGE, MAX_AGE),
MASKING_MATRIX, 1, MIN_AGE)})
To evaluate model fit, we perform posterior predictive checks for age structure in the incidence of symptomatic vivax malaria, as well as time variation in incidence of symptomatic vivax malaria over the course of the study.
We calculate the incidence rate, stratified by age at enrolment, as the quotient of the total number of symptomatic malaria episodes recorded for an age group and the cumulative duration of clinical follow-up for the age group.
incidence_rate <- list()
incidence_rate[["Posterior predictive"]] <- sapply(posterior_predictive_cohorts, function(x) {
split(x, patient_metadata[rownames(x), "AGE"]) %>%
sapply(function(y) sum(y==1, na.rm = TRUE)/sum(!is.na(y))*365%/%TIME_STEP)}) %>%
apply(1, function(x) quantile(x, c(0.025, 0.5, 0.975))) %>%
t %>% as.data.frame %>% mutate(AGE=as.numeric(rownames(.)))
N_BOOTSTRAP_REPLICATES <- 2000
observed_incidence_bootstrap <- lapply(1:N_BOOTSTRAP_REPLICATES, function(i) {
observed_incidence_by_indiv[sample(1:N_COHORT, N_COHORT, replace=T),]})
incidence_rate[["Observed"]] <- lapply(observed_incidence_bootstrap, function(x) {
x %>% group_by(AGE) %>% summarise(mean_incidence=sum(N_Pv)/sum(Followup_Pv))}) %>%
bind_rows(.id="iter") %>%
group_by(AGE) %>%
summarise(`2.5%`=quantile(mean_incidence, 0.025),
`50%`=quantile(mean_incidence, 0.5),
`97.5%`=quantile(mean_incidence, 0.975))
Additionally, we consider empirical cumulative distribution functions for the incidence by individual, stratified by age group.
age_counts <- patient_metadata %>% group_by(AGE) %>% summarise(count=n()) %>%
mutate(count=factor(paste0(AGE, " years old\n(n=", count, ")"),
levels=paste0(2:15, " years old\n(n=", count, ")")))
indiv_incidence_simulated <- lapply(posterior_predictive_cohorts, function(x) {
data.frame(incidence=rowSums(x==1, na.rm=TRUE)/rowSums(!is.na(x))*(365%/%TIME_STEP),
AGE=patient_metadata[rownames(x), "AGE"]) %>%
arrange(AGE, incidence) %>% mutate(index=1:nrow(.))}) %>%
bind_rows(.id="iter") %>% group_by(AGE, index) %>%
summarise(median=median(incidence), LCI=quantile(incidence, 0.025),
UCI=quantile(incidence, 0.975)) %>% split(f=.$AGE) %>%
lapply(function(x) x %>% mutate(index=seq(1/nrow(.), 1, 1/nrow(.)))) %>%
bind_rows %>% merge(age_counts)
We compare the incidence by window for posterior predictive data against the observed incidence of symptomatic vivax malaria (calculated in 20 day windows, with a moving average across half-windows).
posterior_predictive_incidence_by_window <- lapply(posterior_predictive_cohorts, function(y) {
per_window <- colSums(y==1, na.rm = TRUE)/colSums(!is.na(y))
return(sapply(1:(length(per_window)-1),
function(i) mean(c(per_window[i], per_window[i+1]))))}) %>%
do.call(cbind, .) %>%
apply(1, function(x) quantile(x*YEAR_IN_TIMESTEP, c(0.025, 0.5, 0.975))) %>%
t %>% as.data.frame %>%
mutate(Date=days((1:(N_OBS-1))*TIME_STEP)+START_DATE)
For 2000 parameter combinations sampled uniformly at random from the posterior, we derive the likelihood of observing a clinical vivax recurrence in a specified window due to either spontaneous hypnozoite activation or reinfection event. In doing so, we condition on the history of vivax recurrence prior to the baseline falciparum episode for each child; in addition to potential masking in the window due to lapses in clinical follow-up (left- or right-censoring, or a documented absence from the camp) or post-exposure prophylaxis following the treatment of falciparum malaria. Given the resultant likelihood vector, we model the number of falciparum monoinfections followed by vivax within the specified windows to follow a Poisson binomial distribution (i.e. as a sum of independent, but non-identically distributed Bernoulli random variables).
POST_WINDOWS <- 2:8
Pv_treatment_masking <- malaria_consultations_reconciled %>%
subset(VIVAX==1 & VIVAX_MASKING>0) %>%
transmute(VIN=factor(VIN, levels=keep_VIN),
START=RelativeTime%/%TIME_STEP+2,
END=(TIME_STEP*(RelativeTime%/%TIME_STEP)+TIME_STEP/2+VIVAX_MASKING)%/%TIME_STEP)
FORI_posterior_subset <- mapply(time_dependent_fori,
posterior_draws$LAMBDA1,
posterior_draws$LAMBDA2/mean(tail(SEASONALITY, N_OBS)[(SHIFT_WINDOW+1):N_OBS]),
SHIFT_WINDOW, list(SEASONALITY), SIMPLIFY = F)
vivax_recur_prob_indiv <- function(my_VIN, WINDOW, POST_WINDOW) {
inf_states <- inf_states_by_VIN[[my_VIN]]
target_window <- WINDOW+(1:POST_WINDOW)
target_window <- subset(target_window, target_window<=N_OBS)
# mask any infections after the falciparum episode window
if (max(target_window)<N_OBS) {
inf_states[(max(target_window)+1):N_OBS] <- "M"
}
# unmask prophylaxis due to Pv in the target window
Pv_after_Pf_masking <- Pv_treatment_masking %>% subset(VIN==my_VIN & START>WINDOW+1)
Pv_after_Pf_masking <-
intersect(do.call(c, mapply(function(i, j) {i:j},
Pv_after_Pf_masking$START, Pv_after_Pf_masking$END,
SIMPLIFY = FALSE)), target_window)
if (length(Pv_after_Pf_masking)>0) inf_states[Pv_after_Pf_masking] <- "N"
# generate S vectors before and after baseline Pf
no_Pv_after_Pf <- masked_after_Pf <- inf_states
no_Pv_after_Pf[target_window] <- gsub("C|B", "N", no_Pv_after_Pf[target_window])
masked_after_Pf[target_window] <- "M"
if (all(inf_states[target_window]=="M")) return(rep(NA, N_POSTERIOR_PREDICTIVE_DATASETS))
Pv_after_Pf_observed <- any(inf_states[target_window]=="C")
S_vectors_no_Pv_after_Pf <- generate_S_vectors(list(no_Pv_after_Pf))
S_vectors_masked_after_Pf <- generate_S_vectors(list(masked_after_Pf))
likelihood_no_Pv_after_Pf <- mcmapply(inc_exc_likelihood,
S_vectors_no_Pv_after_Pf[["S_plus"]][1],
S_vectors_no_Pv_after_Pf[["S_minus"]][1],
patient_metadata[my_VIN, "AGE"]*YEAR_IN_TIMESTEP,
calculate_pclin(patient_metadata[my_VIN, "AGE"],
posterior_draws$LOGIT_RHO,
posterior_draws$LOG_GAMMA,
min(AGES), max(AGES)),
FORI_posterior_subset,
posterior_draws$ETA, posterior_draws$NU,
PREL_BASELINE, N_OBS, TIME_STEP, 1, 1)
likelihood_masked_after_Pf <- mcmapply(inc_exc_likelihood,
S_vectors_masked_after_Pf[["S_plus"]][1],
S_vectors_masked_after_Pf[["S_minus"]][1],
patient_metadata[my_VIN, "AGE"]*YEAR_IN_TIMESTEP,
calculate_pclin(patient_metadata[my_VIN, "AGE"],
posterior_draws$LOGIT_RHO,
posterior_draws$LOG_GAMMA,
min(AGES), max(AGES)),
FORI_posterior_subset,
posterior_draws$ETA, posterior_draws$NU,
PREL_BASELINE, N_OBS, TIME_STEP, 1, 1)
return(1-likelihood_no_Pv_after_Pf/likelihood_masked_after_Pf)
}
To gauge confounding due to seasonality, we consider the observed vs model-predicted rate of vivax recurrence in fixed windows following various time points in the SPf66 trial.
vivax_recur_cohort <- function(WINDOW, POST_WINDOW) {
#print(paste0(WINDOW, ", ", POST_WINDOW))
prob_per_window <- sapply(as.character(patient_metadata$VIN),
vivax_recur_prob_indiv, WINDOW, POST_WINDOW)
prob_per_window <- prob_per_window[,colMeans(apply(prob_per_window, 2, is.na))<1]
if (length(prob_per_window)==0 | is.null(dim(prob_per_window))) return(NULL)
N_partial_followup <- ncol(prob_per_window)
N_Pv <- length(unique((malaria_consultations_reconciled %>%
subset(VIVAX==1 & RelativeTime>TIME_STEP*WINDOW &
RelativeTime<TIME_STEP*(WINDOW+POST_WINDOW)))$VIN))
Pv_prop_vals <- seq(0, 1, 1/N_partial_followup)
Pv_prop_cdf <- apply(prob_per_window, 1, function(pp) {
pp <- subset(pp, pp>0 & pp<1)
return(poisbinom::ppoisbinom(length(pp)*Pv_prop_vals, pp))})
Pv_summary <- data.frame(Prop_Pv=Pv_prop_vals, CDF=rowMeans(Pv_prop_cdf),
WINDOW=WINDOW, POST_WINDOW=POST_WINDOW, observed=FALSE)
Pv_summary[N_Pv+1, "observed"] <- TRUE
return(Pv_summary)
}
if (RUN_ALL || !file.exists("Spf66_calibration/vivax_recur_cohort_pred.rds")) {
vivax_recur_cohort_pred <- mapply(vivax_recur_cohort,
rep(seq(0, 55, 5), each=length(POST_WINDOWS)),
rep(POST_WINDOWS, 12), SIMPLIFY = FALSE)
write_rds(vivax_recur_cohort_pred, "Spf66_calibration/vivax_recur_cohort_pred.rds")
} else {
vivax_recur_cohort_pred <- read_rds("Spf66_calibration/vivax_recur_cohort_pred.rds")
}
vivax_recur_CrI <-
data.frame(WINDOW=sapply(vivax_recur_cohort_pred, function(x) x[1, "WINDOW"]),
POST_WINDOW=sapply(vivax_recur_cohort_pred, function(x) x[1, "POST_WINDOW"])*TIME_STEP,
LCI=sapply(vivax_recur_cohort_pred, function(x) min(subset(x$Prop_Pv, x$CDF>=0.005))),
UCI=sapply(vivax_recur_cohort_pred, function(x) min(subset(x$Prop_Pv, x$CDF>=0.995))),
observed=sapply(vivax_recur_cohort_pred, function(x) subset(x$Prop_Pv, x$observed)[1]),
pvalue=sapply(vivax_recur_cohort_pred, function(x) subset(x$CDF, x$observed)[1])) %>%
mutate(BASELINE=paste0(START_DATE+days(TIME_STEP*WINDOW), " (day ", TIME_STEP*WINDOW, ")"))
Various studies have demonstrated an elevated risk of vivax malaria following febrile falciparum episodes. To mimic the structure of a typical study, we focus on the burden of vivax recurrence in fixed follow-up windows following each symptomatic falciparum monoinfection. We stratify baseline episodes based on treatment with artesunate monotherapy vs artesunate-mefloquine combination therapy, with nearest neighbour matching for the time of the baseline episode and the number of previously recorded vivax episodes.
falcip_monoinf <- malaria_consultations_reconciled %>%
mutate(WINDOW=RelativeTime%/%TIME_STEP+1) %>%
subset(VIVAX==0 & FALCIP==1 & Treatment %in% c("AS", "AS+MF") & WINDOW<N_OBS-max(POST_WINDOWS)-1 &
!near_treatment_failure) %>%
transmute(VIN=as.character(VIN), WINDOW=RelativeTime%/%TIME_STEP+1,
Treatment=Treatment, PfTime=RelativeTime,
Group=as.numeric(Treatment=="AS"))
falcip_monoinf$Prev_Pv_ep <- apply(falcip_monoinf, 1, function(x)
malaria_consultations_reconciled %>%
subset(as.character(VIN)==as.character(x[["VIN"]]) & VIVAX==1 &
RelativeTime<as.numeric(x[["PfTime"]])) %>% nrow)
falcip_monoinf$Next_Pv_ep <- apply(falcip_monoinf, 1, function(x) {
Pv_after_baseline <- malaria_consultations_reconciled %>%
subset(as.character(VIN)==as.character(x[["VIN"]]) & VIVAX==1 &
RelativeTime>as.numeric(x[["PfTime"]]))
if (nrow(Pv_after_baseline)==0) return(Inf)
return(min(Pv_after_baseline$RelativeTime)%/%TIME_STEP+1)})
match_falcip_monoinf_covariates <-
matchit(Group ~ Prev_Pv_ep + PfTime, data=falcip_monoinf,
distance="glm", method="nearest")
falcip_monoinf <-
match.data(match_falcip_monoinf_covariates, drop.unmatched = FALSE)
falcip_monoinf_groups <- list()
falcip_monoinf_groups[["AS"]] <- which(falcip_monoinf$Treatment=="AS")
falcip_monoinf_groups[["AS+MF"]] <- which(falcip_monoinf$Treatment=="AS+MF")
falcip_monoinf_groups[["AS+MF matched"]] <-
which(falcip_monoinf$Treatment=="AS+MF" & falcip_monoinf$weights==1)
names(falcip_monoinf_groups) <- paste0(names(falcip_monoinf_groups), " (n=",
sapply(falcip_monoinf_groups, length), ")")
vivax_recur_prob_windows <- lapply(POST_WINDOWS, function(W) {
mapply(vivax_recur_prob_indiv, falcip_monoinf$VIN, falcip_monoinf$WINDOW, W)})
#write_rds(vivax_recur_prob_windows, "vivax_recur_prob_windows.rds")
prop_pv_after_pf <- function(Pf_monoinf_subset) {
lapply(1:length(POST_WINDOWS), function(i) {
POST_WINDOW <- POST_WINDOWS[i]
# restrict attention to the subset of baseline episode with at least partial
# clinical follow-up in the window of interest
x <- vivax_recur_prob_windows[[i]][,Pf_monoinf_subset]
x <- x[,colMeans(apply(x, 2, is.na))<1]
if (length(x)==0) {
return(data.frame(Prop_Pv=NA, CDF=NA))
}
if (is.null(dim(x))) {
return(data.frame(Prop_Pv=NA, CDF=NA))
}
N_Pv <- sum(falcip_monoinf[Pf_monoinf_subset,]$Next_Pv_ep-
falcip_monoinf[Pf_monoinf_subset,]$WINDOW <= POST_WINDOW)
Pv_prop_vals <- seq(0, 1, 1/ncol(x))
Pv_prop_cdf <- apply(x, 1, function(pp) {
pp <- subset(pp, pp>0 & pp<1)
return(poisbinom::ppoisbinom(length(pp)*Pv_prop_vals, pp))})
Pv_prop_summary <- data.frame(Prop_Pv=Pv_prop_vals, CDF=rowMeans(Pv_prop_cdf),
POST_WINDOW=POST_WINDOW, observed=FALSE)
Pv_prop_summary[N_Pv+1, "observed"] <- TRUE
return(Pv_prop_summary)}) %>% bind_rows()
}
prop_pv_after_pf_pred <- lapply(falcip_monoinf_groups, prop_pv_after_pf)
prop_pv_after_pf_CrI <- lapply(prop_pv_after_pf_pred, function(x) {
x %>% split(f=.$POST_WINDOW) %>%
lapply(function(x) c(POST_WINDOW=x[1, "POST_WINDOW"],
LCI=min(subset(x$Prop_Pv, x$CDF>=0.005)),
UCI=min(subset(x$Prop_Pv, x$CDF>=0.995)),
observed=subset(x$Prop_Pv, x$observed)[1],
pvalue=subset(x$CDF, x$observed)[1])) %>% bind_rows()
}) %>% bind_rows(.id="Group")
| Group | POST_WINDOW | LCI | UCI | observed | pvalue |
|---|---|---|---|---|---|
| AS (n=120) | 2 | 0.0180180 | 0.1351351 | 0.1081081 | 2.422e-02 |
| AS (n=120) | 3 | 0.0438596 | 0.1929825 | 0.2807018 | 1.044e-07 |
| AS (n=120) | 4 | 0.0683761 | 0.2393162 | 0.3589744 | 1.162e-09 |
| AS (n=120) | 5 | 0.1016949 | 0.2711864 | 0.4322034 | 1.098e-11 |
| AS (n=120) | 6 | 0.1271186 | 0.3050847 | 0.4830508 | 1.630e-12 |
| AS (n=120) | 7 | 0.1440678 | 0.3389831 | 0.5000000 | 2.050e-11 |
| AS (n=120) | 8 | 0.1694915 | 0.3644068 | 0.5254237 | 2.520e-11 |
| AS+MF (n=607) | 4 | 0.0276243 | 0.0755064 | 0.0276243 | 9.943e-01 |
| AS+MF (n=607) | 5 | 0.0588235 | 0.1193772 | 0.0813149 | 6.617e-01 |
| AS+MF (n=607) | 6 | 0.0870307 | 0.1569966 | 0.1467577 | 2.415e-02 |
| AS+MF (n=607) | 7 | 0.1118644 | 0.1881356 | 0.2067797 | 8.982e-05 |
| AS+MF (n=607) | 8 | 0.1336717 | 0.2148900 | 0.2487310 | 1.545e-06 |
| AS+MF matched (n=120) | 4 | 0.0098039 | 0.1176471 | 0.0196078 | 9.224e-01 |
| AS+MF matched (n=120) | 5 | 0.0275229 | 0.1743119 | 0.1009174 | 3.289e-01 |
| AS+MF matched (n=120) | 6 | 0.0535714 | 0.2142857 | 0.1875000 | 2.879e-02 |
| AS+MF matched (n=120) | 7 | 0.0789474 | 0.2543860 | 0.2631579 | 1.407e-03 |
| AS+MF matched (n=120) | 8 | 0.1052632 | 0.2807018 | 0.2982456 | 1.113e-03 |
We now derive metrics of epidemiological interest, predicated on our posterior median estimates.
Here, we consider the size of the hypnozoite reservoir, and the time to spontaneous clearance of the hypnozoite reservoir (with a threshold probability) assuming mosquito-to-human transmission is temporarily interrupted (e.g. due to widespread administration of ivermectin).
Assumption: hypnozoite reservoir has reached stationarity under a constant force of inoculation.
# Size of the hypnozoite reservoir at stationarity
HMAX <- 11
TIMES <- seq(0, 730, 0.05)
hyp_tail_dist <- sapply(COARSE_LAMBDA_VALS, function(x) {
hyp_reservoir_stationary_cdf(HMAX, ETA_POSTERIOR, NU_POSTERIOR, PREL_BASELINE, x)}) %>%
as.data.frame %>% mutate(n_hyp=0:HMAX) %>% reshape2::melt(id="n_hyp") %>%
transmute(n_hyp=n_hyp+1, tail_prob=1-value,
lambda=COARSE_LAMBDA_VALS[as.numeric(gsub("V", "", variable))]*365)
# Time to spontaneous clearance of the hypnozoite reservoir
# (assuming mosquito-to-human transmission temporarily interrupted)
THRESHOLD_VALS <- c(0.8, 0.85, 0.9, 0.95, 0.96, 0.97, 0.98, 0.99)
clearance_time <- lapply(THRESHOLD_VALS, function(THRESHOLD)
data.frame(LAMBDA=FINE_LAMBDA_VALS*365, THRESHOLD=THRESHOLD,
Clearance_Prob_Time=sapply(FINE_LAMBDA_VALS, function(x)
time_to_clear_hyp(THRESHOLD, ETA_POSTERIOR, NU_POSTERIOR, PREL_BASELINE, x)))) %>%
bind_rows()
At a force of inoculation of 0.5 bites per year, only 27% of people
are predicted to carry hypnozoites; however, to ensure spontaneous
hypnozoite clearance with probability 0.98, mosquito-to-human
transmission would need to be interrupted for 1.6 years. `
Here, we consider the predictive value of recent bloodstream infection as a predictor of hypnozoite carriage.
Assumption: hypnozoite reservoir has reached stationarity under a constant force of inoculation.
SEROTAT_WINDOW <- 270
sensitivity_specificity_recent_recur <- sapply(FINE_LAMBDA_VALS, function(LAMBDA)
recent_recur_accuracy_stationary(SEROTAT_WINDOW, ETA_POSTERIOR, NU_POSTERIOR, PREL_BASELINE, LAMBDA)) %>%
t %>% as.data.frame %>%
transmute(LAMBDA=FINE_LAMBDA_VALS*365, Hyp_Carriage=Hyp_Carriage,
Recent_Recur=Sensitivity*Hyp_Carriage/Recent_Recurrence,
No_Recent_Recur=1-Specificity*(1-Hyp_Carriage)/(1-Recent_Recurrence)) %>%
reshape2::melt(id="LAMBDA")
At a force of inoculation of 0.5 infectious bites per year, individuals who have had recent recurrences are 2 as likely to harbour hypnozoites than randomly-sampled individuals; for 2 bites year, they are only 1.1 times more likely to harbour hypnozoites. The false omission rate (conditional probability of hypnozoite carriage given no recent recurrences) is generally low but rises to 18% under a force of inoculation of 2 infectious bites per year.
Here, we perform a theoretical comparison of MSAT vs MDA, whilst accommodating population heterogeneity in the form of a Gamma-distributed force of inoculation (Smith et al, 2005).
Assumptions: for each individual, the hypnozoite reservoir has reached stationarity under a force of inoculation sampled from a uniform distribution; the outcome of the serological test given recent recurrence is conditionally independent to hypnozoite carriage.
Note: the expression for the proportion of hypnozoite carriers who are correctly-targeted is formulated in terms of the Hurwitz-Zeta function; we have computed this expression using Mathematica, since there is no available implementation in R.
SEROTAT_WINDOW <- 270
SEROTAT_SPEC <- 0.8
SEROTAT_SENS <- 0.8
THETA_VALS <- 10^seq(-7, 0, 0.01)
serotat_specificity_het <-
lapply(COARSE_LAMBDA_VALS, function(LAMBDA_MEAN) {
data.frame(THETA=THETA_VALS, LAMBDA=LAMBDA_MEAN*365,
Specificity=sapply(THETA_VALS, function(THETA) {
recent_recur_specificity_stationary_het(SEROTAT_WINDOW, ETA_POSTERIOR, NU_POSTERIOR,
PREL_BASELINE, LAMBDA_MEAN, THETA)}))}) %>%
bind_rows() %>% mutate(PROP_TOP_20=prop_bites_top_20(THETA, LAMBDA/365),
GINI_COEFF=acid::gini.gamma(THETA))
# Hurwitz-zeta function (with shift!=1) not implemented in R, sensitivity
# in the presence of heterogeneity computed in Mathematica; numerical overflow
# and loss of precision if theta too small
param_vals <- expand.grid(SEROTAT_WINDOW=SEROTAT_WINDOW, ETA=ETA_POSTERIOR, NU=NU_POSTERIOR,
PREL=PREL_BASELINE, LAMBDA=COARSE_LAMBDA_VALS, THETA=THETA_VALS) %>%
subset(THETA>=10^-4)
write.csv(param_vals, file="Metrics_of_interest_code/recent_recur_sensitivity_stationary_het_params.csv", row.names = FALSE)
recent_recur_sensitivity_het <-
read.delim("Metrics_of_interest_code/recent_recur_sensitivity_stationary_het_Mathematica.dat", header=FALSE)
serotat_sensitivity_het <-
bind_cols(param_vals[, c("LAMBDA", "THETA")], recent_recur_sensitivity_het) %>%
transmute(PROP_TOP_20=prop_bites_top_20(THETA, LAMBDA), LAMBDA=LAMBDA*365, Sensitivity=V1)
serotat_sensitivity <-
data.frame(LAMBDA=COARSE_LAMBDA_VALS*365, PROP_TOP_20=0.2,
Sensitivity=sapply(COARSE_LAMBDA_VALS, function(x)
recent_recur_accuracy_stationary(SEROTAT_WINDOW, ETA_POSTERIOR, NU_POSTERIOR, PREL_BASELINE, x)[["Sensitivity"]]))
serotat_sensitivity_het <- bind_rows(serotat_sensitivity_het, serotat_sensitivity)
In a population where individuals are bitten once a year on average, but 20% of individuals receive 80% of all infective bites (Gini coefficient 0.985), we predict serological MSAT to yield a 2.8 fold reduction in overtreatment relative to MDA (in the eligible population).
Here, we assume that an individual has a baseline recurrence on day 1 and consider both the time to the secondary recurrence, and the probability that the baseline and secondary recurrences are derived from different sporozoite batches.
Assumption: hypnozoite reservoir has reached stationarity under a constant force of inoculation.
# discretised in time steps of days
N_VALS <- 2:250
baseline_to_secondary_recurrence <-
lapply(COARSE_LAMBDA_VALS, function(LAMBDA) {
data.frame(LAMBDA=LAMBDA*365, time=c(0, N_VALS-1),
cdf_next_recur=cumsum(c(0, conditional_prob_next_recurrence(N_VALS, 1, LAMBDA,
NU_POSTERIOR, PREL_BASELINE, ETA_POSTERIOR))),
prob_same_batch=c(NA, conditional_prob_same_batch(N_VALS, 1, LAMBDA,
NU_POSTERIOR, PREL_BASELINE, ETA_POSTERIOR)))}) %>% bind_rows
Under a force of inoculation of 0.5 bites per year, we predict 38% of baseline recurrences to be followed by a second recurrence within 28 days of follow-up, with this figure rising to 55% under a more intense force of inoculation of 2 bites per year.
Given a secondary recurrence occurs precisely 28 days after a baseline recurrence, we preiduct that it is derived from a different sporozoite batch with probability 0.22 under a force of inoculation of 0.5 bites per year, but a substantially higher probability of 0.55 under a force of inoculation of 2 bites per year.
Granular analysis of the risk of vivax malaria following symptomatic falciparum malaria is confounded by seasonality. As a baseline compartor, we derive the force of inoculation required to yield spontaneous recurrence with a given probability in any defined window of follow-up, accounting for antidisease masking (but without adjusting for post-exposure prophylaxis).
Ignoring the effects of seasonality and post-exposure prophylaxis, we can derive the force of inoculation required to yield recurrence with a given probability over a period of clinical follow-up.
Assumption: hypnozoite reservoir has reached stationarity under a constant force of inoculation.
background_recurrence <-
expand.grid(W=c(30, 60, 90), p_recur=seq(0, 0.5, 0.01), PCLIN=seq(0.5, 1, 0.1)) %>%
mutate(FORI=fori_required_for_recur(W, p_recur, NU_POSTERIOR, PREL_BASELINE, ETA_POSTERIOR, PCLIN)*365,
W=paste0(W, " days of follow-up"))
To ensure spontaneous recurrence with probability 0.14 over 60 days of follow-up, an average of 0.3652257 bites per year would be required; adjusting for antidisease masking, with each hypnozoite activation and immediate sporozoite development event giving rise to clinical symptoms with probability 0.75, this this figure rises to an average of 0.4533424 bites per year. To explain the corresponding proportion of 0.42 over 60 days of follow-up after rapidly-eliminated antimalarial treatment, we would instead require 1.319086 bites per year (ignoring antidisease masking) or 1.6373373 bites per year in the setting where each hypnozoite activation and immediate sporozoite development event giving rise to clinical symptoms with probability 0.75.
Here, we consider the inter-relapse times following a single infective bite (with a geometrically-distributed hypnozoite inoculum), with the assumption that a hypnozoite activation event gives rise to a detectable relapse if and only if it occurs at least T_MASK days after the previous detectable relapse.
T_MASK <- 10
relapses_per_bite <- detectable_relapses_per_bite(NU_POSTERIOR, PREL_BASELINE, ETA_POSTERIOR, T_MASK)
inter_relapse_cdf_per_bite <- inter_relapse_cdfs(NU_POSTERIOR, PREL_BASELINE, ETA_POSTERIOR, T_MASK, seq(0, 300, 2))
| n_relapses | activated | detected |
|---|---|---|
| 1 | 0.7328189 | 0.7328189 |
| 2 | 0.5370235 | 0.5285090 |
| 3 | 0.3935410 | 0.3748590 |
| 4 | 0.2883943 | 0.2612981 |
| 5 | 0.2113408 | 0.1788722 |
| 6 | 0.1548745 | 0.1201619 |
| 7 | 0.1134950 | 0.0791551 |
| 8 | 0.0831712 | 0.0510911 |
| 9 | 0.0609495 | 0.0322868 |
| 10 | 0.0446649 | 0.0199605 |
| 11 | 0.0327313 | 0.0120624 |
| 12 | 0.0239861 | 0.0071196 |
| 13 | 0.0175775 | 0.0041009 |
| 14 | 0.0128811 | 0.0023032 |
| 15 | 0.0094395 | 0.0012604 |
| 16 | 0.0069175 | 0.0006712 |
Ignoring any masking of hypnozoite activation events in quick succession, we also calculate the coefficient of variation (CV) for the mth inter-relapse times following a single infective bite. We can show that the CV is independent of m.
CV <- sapply(MCMC_posterior$NU,
function(NU) sqrt(2*NU*PREL_BASELINE*HMMcopula::dilog(1/(1+NU*PREL_BASELINE)) -
log(1+NU*PREL_BASELINE)^2)/log(1+NU*PREL_BASELINE))
We estimate this CV to be 1.41 (95% CrI 1.35 to 1.48).
Here, we derive the probability that each recorded clinical recurrence is a relapse (defined to be the probability that it is caused by hypnozoite activation event(s) only). In deriving this probability, we account for the effects of prophylactic bunching, and condition on all observed inter-recurrence intervals, both before and after the target episode. Unlike other metrics, this classification additionally relies on posterior estimates for the force of inoculation (including seasonality) and the age-dependent probability of symptomatic infection.
PCLIN_VALS <- PCLIN_POSTERIOR[as.character(N_AGES/YEAR_IN_TIMESTEP)]
names(PCLIN_VALS) <- names(N_AGES)
relapse_classfication <-
classify_relapses(inf_states_by_VIN, N_AGES, PCLIN_VALS, FORI_POSTERIOR,
ETA_POSTERIOR, NU_POSTERIOR, PREL_BASELINE)
We visualise probabilistic classifications for all 7 year olds with at least two recorded recurrences below. Crosses indicate recorded falciparum infections; light grey bars indicate masking (due to left- or right-censoring, a documented absence from the camp or post-exposure prophylaxis); and dark grey boxes indicate recurrences which could not be probabilistically classified due to numerical errors.